# -*-coding:utf-8-*-
import re
from urllib.error import HTTPError
from urllib.request import urlopen
from urllib.parse import urlencode, quote
from bs4 import BeautifulSoup
from common import config
from entity.BookInfo import BookInfo

__author__ = 'Jason'

# 查询图书信息
def query(title,docType):
    qTitle = re.split(r"—|：|\.|_|-|（|()",title.replace("[","").replace("]","").replace("【","").replace("】","").replace("《","").replace("》",""))[0]

    bsListHtml = getListHtml(qTitle,docType)
    itemOne = bsListHtml.find("a",{"istitle":"true"})

    if itemOne:
        id = itemOne.attrs["id"]
        print(id)
        bookHtml = getDetailHtml(id)
        book = buildBookInfo(bookHtml)
        book.title = title
    else:
        book=BookInfo()

    return book

# 获取图书列表页信息
def getListHtml(title,docType="图书"):
    url = config.wenjin_api1.format(quote(title),quote(docType),quote("全部字段"))
    try:
        html = urlopen(url,timeout=5)
    except HTTPError as e:
        print(e)
        return ""
    else:
        return BeautifulSoup(html.read(),"html.parser")

# 获取图书详情页信息
def getDetailHtml(bookId):
    url = config.wenjin_api2.format(bookId)

    try:
        html = urlopen(url)
    except HTTPError as e:
        print(e)
        return ""
    else:
        return BeautifulSoup(html.read(),"html.parser")

# 生成图书信息
def buildBookInfo(bookHtml):
    infos = bookHtml.find("div",{"id":"detail-info"}).findAll("p")

    book=BookInfo()
    for infoDic in infos:
        infoSrc = infoDic.get_text().replace("\r","").replace("\t","").replace("\n","")
        g = re.split(r'[:,：]', infoSrc)
        if g:
            # print(g[len(g) - 2].strip() + ":" + g[len(g) - 1].strip())
            if "分类" in g:
                attrName = mapAttrs("中图分类")
                findcate = re.findall(r'[A-Z]\d{1,}[\.-\\d/]{0,}', infoSrc)
                cate = ""
                if len(findcate) > 0:
                    cate = findcate[0]
                setattr(book, attrName, cate)
            else:
                attrName = mapAttrs(g[len(g) - 2].strip())
                if attrName:
                    setattr(book,attrName,g[len(g) - 1].strip())
        # print("|")

    if book.sort_no:
        book.sort_no = book.sort_no.split("=")[0]
    return book

# 暂时支持这几个属性,其实仅分类有实际价值
dict = {"author" : "所有责任者", "sort_no" : "中图分类", "fl_title" : "并列正题名", "group_title" : "丛编题名","lang":"语种"}
def mapAttrs(val):
    for (d,x) in dict.items():
        if x == val:
            return d
    return ""